Esse documento traz os resultados e códigos utilizados nas análises dos fatores que contribuem para a expansão dos casos de covid-19 no estado da Bahia. Esse estudo foi realizado pelo grupo de estudos em ecologia espacial da UFBA, incluindo diversos pesquisadores e laboratórios do Instituo de Biologia.
Se for seguir o código para recriar as análises, antes de inciar, carregue e instale os seguintes pacotes.
library(coronabr) # pode baixar aqui: https://github.com/liibre/coronabr
library(tidyverse)
library(car)
library(randomForest)
library(rgdal) #load map
library(sp) #plot maps
Com o código abaixo podemos baixar os dados para todos os municipios Bahia. Para saber mais sobre as fontes do dados acesse o seguinte link: https://github.com/liibre/coronabr.
covid <- as_tibble(get_corona_br(uf = "BA"))
Pequenos ajustes na tabela:
covid <- covid %>%
filter(place_type == "city") %>%
mutate(city = factor(city, levels = unique(city)))
Dados por municipio:
mun_covid <- covid %>%
filter(date == date[1]) %>%
mutate(afetados = ifelse(confirmed > 0, 1, 0))
Estatísticas dos casos na Bahia:
mun_covid %>%
summarise("Casos totais" = sum(confirmed),
"Mortes totais" = sum(deaths),
"Número de municipios afetados" = sum(confirmed > 0))
Carregando os dados do IBGE e mapbiomas:
ibge <- read_csv2(file("Data/new_ibge.csv", encoding="UTF-8")) %>%
separate(cidade, c("Cidade", "Estado"), sep = "\\(") %>%
mutate(Estado = str_remove(Estado, "\\)")) %>%
filter(Estado == "BA")
## Warning: Missing column names filled in: 'X1' [1]
federal <- read_csv2("Data/federal_w_codes.csv")
## Warning: Missing column names filled in: 'X1' [1]
centralidade <- read_csv2("Data/new.dat.ba.csv")
## Warning: Missing column names filled in: 'X1' [1]
clima <- read_csv2("Data/climatic.br.csv") %>%
mutate(precTotal = rowSums(.[3:7]),
tmean = apply(.[8:17], 1, mean))
## Warning: Missing column names filled in: 'X1' [1]
meso <- read_csv('Data/meso.csv')
colnames(meso)[1] <- 'mesoregiao'
aero <- read_csv2('Data/main.air.ba.csv')
## Warning: Missing column names filled in: 'X1' [1]
Juntando as tabelas:
munis <- left_join(ibge, centralidade, by = c("cod_ibge" = "ibgecode")) %>%
left_join(federal, by = c("cod_ibge" = "ibge")) %>%
left_join(mun_covid, by = c("cod_ibge" = "city_ibge_code")) %>%
left_join(clima, by = c("cod_ibge" = "ibge")) %>%
left_join(meso, by = c("cod_ibge" = "code")) %>%
left_join(aero, by = c("cod_ibge" = "ibge")) %>%
mutate(dens.road = ifelse(is.na(dens.road), 0, dens.road),
afetados = ifelse(is.na(afetados), 0, afetados),
airport = ifelse(is.na(airport), "NO", airport),
confirmed = ifelse(is.na(confirmed), 0, confirmed),
confirmed_per_100k_inhabitants = ifelse(is.na(confirmed_per_100k_inhabitants), 0,
confirmed_per_100k_inhabitants),
tam_pop_urb = total.pop * (1 - perc.rural))
Modelo logístico correlacionando as variáveis com presença ou ausência do vírus. As variáveis foram escolhidas visando diminuir a correlação entre elas e manter a expectativa teorica.
reg_log <- glm(afetados ~
airport + log(total.pop) + perc.rural +
log(eingen.cen.dist) + school.year +
perc.with.wages + dist.min +
tmean + log(precTotal),
data = munis,
family = binomial)
Checar a inflação do modelo:
# VIF
vif(reg_log)
## airport log(total.pop) perc.rural
## 1.275313 1.493799 1.584156
## log(eingen.cen.dist) school.year perc.with.wages
## 1.330059 1.955988 1.147539
## dist.min tmean log(precTotal)
## 1.505226 1.137059 1.501329
Resultado do modelo:
resu <- summary(reg_log)
Adicionado por Anderson (20/04/20)
cor(munis$perc.rural, munis$total.pop)
## [1] -0.2140484
cor(munis$tam_pop_urb, munis$total.pop)
## [1] 0.9987153
cor(munis$tam_pop_urb, munis$perc.rural)
## [1] -0.2287015
Modelo logístico com novas variáveis:
reg_log2 <- glm(afetados ~
airport + log(total.pop)+ perc.rural +
log(eingen.cen.dist) + school.year +
perc.with.wages + dist.min.ilh.ssa +
log(precTotal) + tmean,
data = munis,
family = binomial)
Checar a inflação do modelo:
# VIF
vif(reg_log2)
## airport log(total.pop) perc.rural
## 1.220213 1.660506 1.669023
## log(eingen.cen.dist) school.year perc.with.wages
## 1.502610 1.974139 1.142408
## dist.min.ilh.ssa log(precTotal) tmean
## 1.324672 1.204350 1.161880
Resultado do modelo:
resu2 <- summary(reg_log2)
Modelo logístico com novas variáveis: incluindo renda mensal (month.wages) e removendo variáveis para diminuir o VIF
reg_log3 <- glm(afetados ~
airport + dist.min.ilh.ssa +
log(eingen.cen.dist) +
perc.with.wages + month.wages +
log(precTotal) + tmean,
data = munis,
family = binomial)
Checar a inflação do modelo:
# VIF
vif(reg_log3)
## airport dist.min.ilh.ssa log(eingen.cen.dist)
## 1.279615 1.131919 1.265554
## perc.with.wages month.wages log(precTotal)
## 1.095174 1.657746 1.320686
## tmean
## 1.097639
resu3 <- summary(reg_log3)
Modelo logístico apenas com mesorregiões:
reg_log4 <- glm(afetados ~
mesoregiao,
data = munis,
family = binomial)
Na verdade, um qui-quadrado gourmet:
resu4 <- summary(reg_log4)
Modelo logístico apenas com estrutura da rede de transporte:
sim.roles<-ifelse(munis$roles=="network hub", "hub", munis$roles)
reg_log5 <- glm(afetados ~
log(eingen.cen.dist)+sim.roles+as.factor(module),
data = munis,
family = binomial)
Na verdade, um qui-quadrado gourmet:
resu5 <- summary(reg_log5)
Comparando os modelos com AIC
aic.log <- AIC(reg_log,reg_log2,reg_log3, reg_log4, reg_log5)
## Warning in AIC.default(reg_log, reg_log2, reg_log3, reg_log4, reg_log5): models
## are not all fitted to the same number of observations
aic.log
Recalcular os valores de p por monte carlo, aleatorizando a variavel “afetados”. O modelo escolhido foi o reg_log2 com base no valor de AIC.
rept <- 1000
obs_z <- summary(reg_log2)$coefficients[, 3]
obs <- coefficients(reg_log)
zs <- coefs <- matrix(ncol = length(obs), nrow = rept)
colnames(zs) <- colnames(coefs) <- names(obs)
for (i in 1:rept) {
munis$rnd_afetados <- sample(munis$afetados)
reg_log <- glm(afetados ~
airport + log(total.pop)+ perc.rural +
log(eingen.cen.dist) + school.year +
perc.with.wages + dist.min.ilh.ssa +
log(precTotal) + tmean,
data = munis,
family = binomial)
zs[i, ] <- summary(reg_log)$coefficients[, 3]
coefs[i, ] <- coefficients(reg_log)
}
for (j in 1:length(obs)) {
maior <- (sum(obs[j] >= coefs[, j]) + 1 ) / (rept + 1) * 2
menor <- (sum(obs[j] <= coefs[, j]) + 1) / (rept + 1) * 2
resu$coefficients[j, 4] <- ifelse(maior > menor, menor, maior)
}
resu
##
## Call:
## glm(formula = afetados ~ airport + log(total.pop) + perc.rural +
## log(eingen.cen.dist) + school.year + perc.with.wages + dist.min +
## tmean + log(precTotal), family = binomial, data = munis)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0148 -0.7563 -0.5121 0.7901 2.4703
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.961e+01 5.313e+00 -3.692 0.002 **
## airportYES -1.027e+00 7.215e-01 -1.423 0.002 **
## log(total.pop) 9.928e-01 2.223e-01 4.466 0.002 **
## perc.rural -2.896e+00 8.108e-01 -3.571 0.002 **
## log(eingen.cen.dist) 1.428e-01 9.322e-02 1.532 0.002 **
## school.year -2.032e-01 2.626e-01 -0.774 0.002 **
## perc.with.wages 1.267e+00 2.813e+00 0.451 0.002 **
## dist.min 8.151e-05 3.152e-03 0.026 0.002 **
## tmean 1.458e-01 1.125e-01 1.297 0.002 **
## log(precTotal) 1.296e+00 5.062e-01 2.559 0.002 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 508.37 on 404 degrees of freedom
## Residual deviance: 405.50 on 395 degrees of freedom
## (12 observations deleted due to missingness)
## AIC: 425.5
##
## Number of Fisher Scoring iterations: 4
As simulações de monte carlo confirmam os resultados anteriores.
CONCLUSÃO: O modelo indica que municípios com uma população grande e urbana, têm maior probabilidade de serem afetados. Bem como municipios com mais chuva durante os últimos 4 meses.
munis %>% mutate(afetados = ifelse(afetados == 1, "Sim", "Não")) %>%
ggplot(aes(x = afetados ,
y = total.pop)) +
geom_violin() +
geom_jitter(alpha = .5) +
scale_y_log10() +
xlab("Município afetado") +
ylab("Tamanho população")
munis %>% mutate(afetados = ifelse(afetados == 1, "Sim", "Não")) %>%
ggplot(aes(x = afetados ,
y = perc.rural)) +
geom_violin() +
geom_jitter(alpha = .5) +
xlab("Município afetado") +
ylab("Porcentagem população rural")
munis %>% mutate(afetados = ifelse(afetados == 1, "Sim", "Não")) %>%
ggplot(aes(x = afetados,
y = precTotal)) +
geom_violin() +
geom_jitter(alpha = .5) +
scale_y_log10() +
xlab("Município afetado") +
ylab("Precipitação")
Principais fatores contribuindo para a quantidade de casos, nas cidades afetadas.
reg_N <- lm(log(confirmed_per_100k_inhabitants+1) ~
airport + log(total.pop) + perc.rural +
log(eingen.cen.dist) + school.year +
perc.with.wages + dist.min +
precTotal + tmean,
data = filter(munis, confirmed > 0))
vif(reg_N)
## airport log(total.pop) perc.rural
## 1.468365 3.002980 2.164975
## log(eingen.cen.dist) school.year perc.with.wages
## 1.784824 3.494782 1.293024
## dist.min precTotal tmean
## 1.831519 1.735719 1.224695
resu <- summary(reg_N)
resu
##
## Call:
## lm(formula = log(confirmed_per_100k_inhabitants + 1) ~ airport +
## log(total.pop) + perc.rural + log(eingen.cen.dist) + school.year +
## perc.with.wages + dist.min + precTotal + tmean, data = filter(munis,
## confirmed > 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.33924 -0.60499 -0.08731 0.53679 2.34398
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.8014874 2.4320154 2.797 0.00602 **
## airportYES 0.3294023 0.3587640 0.918 0.36038
## log(total.pop) -0.3350959 0.1313915 -2.550 0.01202 *
## perc.rural -1.0643024 0.5243482 -2.030 0.04459 *
## log(eingen.cen.dist) -0.0458574 0.0672630 -0.682 0.49670
## school.year 0.0812434 0.1574984 0.516 0.60692
## perc.with.wages -0.5236581 2.0210922 -0.259 0.79600
## dist.min -0.0019814 0.0020672 -0.958 0.33974
## precTotal 0.0008158 0.0006226 1.310 0.19259
## tmean -0.0568844 0.0710695 -0.800 0.42506
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8569 on 120 degrees of freedom
## Multiple R-squared: 0.2089, Adjusted R-squared: 0.1496
## F-statistic: 3.522 on 9 and 120 DF, p-value: 0.0006673
O resultado preliminar aponta para um efeito negativo da população sobre o número de casos a cada 100 mil habitantes, porém pode ser apenas um efeito matemático. Para avaliar isso, recalculamos abaixo os valore de p usando uma simulação da Monte Carlo, onde o numero de casos foi aleatorizado e foi recalculada o número de casos por 100k habitantes.
rept <- 1000
obs <- coef(reg_N)
coefs <- matrix(ncol = length(obs), nrow = rept)
colnames(coefs) <- names(obs)
for (i in 1:rept) {
munis$rnd_cases <- (sample(munis$confirmed) / munis$total.pop) * 1000
while(sum(munis$airport[munis$rnd_cases > 0] == "YES") < 2) {
munis$rnd_cases <- (sample(munis$confirmed) / munis$total.pop) * 1000
}
reg_N <- lm(log(rnd_cases+1) ~
airport + log(total.pop) + perc.rural +
log(eingen.cen.dist) + school.year +
perc.with.wages + dist.min +
precTotal + tmean,
data = filter(munis, rnd_cases > 0))
coefs[i, ] <- coef(reg_N)
}
for (j in 1:length(obs)) {
maior <- (sum(obs[j] >= coefs[, j]) + 1 ) / (rept + 1) * 2
menor <- (sum(obs[j] <= coefs[, j]) + 1) / (rept + 1) * 2
resu$coefficients[j, 4] <- ifelse(maior > menor, menor, maior)
}
resu
##
## Call:
## lm(formula = log(confirmed_per_100k_inhabitants + 1) ~ airport +
## log(total.pop) + perc.rural + log(eingen.cen.dist) + school.year +
## perc.with.wages + dist.min + precTotal + tmean, data = filter(munis,
## confirmed > 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.33924 -0.60499 -0.08731 0.53679 2.34398
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.8014874 2.4320154 2.797 0.0020 **
## airportYES 0.3294023 0.3587640 0.918 0.1918
## log(total.pop) -0.3350959 0.1313915 -2.550 0.0480 *
## perc.rural -1.0643024 0.5243482 -2.030 0.0020 **
## log(eingen.cen.dist) -0.0458574 0.0672630 -0.682 0.2298
## school.year 0.0812434 0.1574984 0.516 0.5734
## perc.with.wages -0.5236581 2.0210922 -0.259 0.5754
## dist.min -0.0019814 0.0020672 -0.958 0.1119
## precTotal 0.0008158 0.0006226 1.310 0.0599 .
## tmean -0.0568844 0.0710695 -0.800 0.2178
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8569 on 120 degrees of freedom
## Multiple R-squared: 0.2089, Adjusted R-squared: 0.1496
## F-statistic: 3.522 on 9 and 120 DF, p-value: 0.0006673
Mesmo calculando o p-valor usando simulação, o efeito da população se mantém. Como pode ser visto abaixo (em cinza, coeficientes simulados, em vermelho o valor observado para o tamanho da população).
ggplot(tibble(coefs = coefs[, 3]), aes(x = coefs)) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = obs[3]), color = "red") +
ggtitle("Efeito do tamanho da população") +
ylab("Frequência") +
xlab("Coeficientes")
munis %>% filter(confirmed > 0) %>%
ggplot(aes(y = log(confirmed_per_100k_inhabitants+1),
x = log(total.pop))) +
geom_point()
Principais fatores contribuindo para a quantidade de casos, nas cidades afetadas.
reg_N2 <- lm(log(confirmed_per_100k_inhabitants + 1) ~
airport + perc.rural +
log(eingen.cen.dist) + school.year +
perc.with.wages + dist.min +
precTotal + tmean + log(tam_pop_urb) +
mesoregiao + dist.min.ilh.ssa,
data = filter(munis, confirmed > 0))
vif(reg_N2)
## GVIF Df GVIF^(1/(2*Df))
## airport 1.569591 1 1.252833
## perc.rural 3.329593 1 1.824717
## log(eingen.cen.dist) 2.163480 1 1.470877
## school.year 4.976555 1 2.230819
## perc.with.wages 1.528771 1 1.236435
## dist.min 3.118893 1 1.766039
## precTotal 4.648730 1 2.156091
## tmean 2.808240 1 1.675780
## log(tam_pop_urb) 4.948147 1 2.224443
## mesoregiao 114.339735 6 1.484282
## dist.min.ilh.ssa 4.320646 1 2.078616
resu2 <- summary(reg_N2)
resu2
##
## Call:
## lm(formula = log(confirmed_per_100k_inhabitants + 1) ~ airport +
## perc.rural + log(eingen.cen.dist) + school.year + perc.with.wages +
## dist.min + precTotal + tmean + log(tam_pop_urb) + mesoregiao +
## dist.min.ilh.ssa, data = filter(munis, confirmed > 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.37100 -0.47795 -0.01916 0.39335 1.94104
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.124e+00 2.579e+00 1.599 0.112601
## airportYES 3.068e-01 3.127e-01 0.981 0.328703
## perc.rural -1.857e-02 5.482e-01 -0.034 0.973036
## log(eingen.cen.dist) -1.344e-01 6.243e-02 -2.153 0.033467
## school.year 5.479e-01 1.584e-01 3.458 0.000768
## perc.with.wages -4.859e-01 1.853e+00 -0.262 0.793608
## dist.min 5.223e-03 2.274e-03 2.297 0.023472
## precTotal -4.052e-05 8.590e-04 -0.047 0.962464
## tmean -1.593e-01 9.073e-02 -1.756 0.081835
## log(tam_pop_urb) -2.075e-01 1.181e-01 -1.757 0.081628
## mesoregiaoCentro-Sul Baiano 3.934e-01 2.981e-01 1.320 0.189618
## mesoregiaoExtremo Oeste Baiano 4.951e-01 7.720e-01 0.641 0.522599
## mesoregiaoMetropolitana de Salvador -3.159e-01 3.518e-01 -0.898 0.371048
## mesoregiaoNordeste Baiano -4.290e-01 2.832e-01 -1.515 0.132656
## mesoregiaoSul Baiano 9.187e-01 3.668e-01 2.505 0.013681
## mesoregiaoVale São-Franciscano da Bahia 1.288e+00 4.376e-01 2.943 0.003951
## dist.min.ilh.ssa -3.561e-03 8.335e-04 -4.273 4.05e-05
##
## (Intercept)
## airportYES
## perc.rural
## log(eingen.cen.dist) *
## school.year ***
## perc.with.wages
## dist.min *
## precTotal
## tmean .
## log(tam_pop_urb) .
## mesoregiaoCentro-Sul Baiano
## mesoregiaoExtremo Oeste Baiano
## mesoregiaoMetropolitana de Salvador
## mesoregiaoNordeste Baiano
## mesoregiaoSul Baiano *
## mesoregiaoVale São-Franciscano da Bahia **
## dist.min.ilh.ssa ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7224 on 113 degrees of freedom
## Multiple R-squared: 0.4706, Adjusted R-squared: 0.3956
## F-statistic: 6.277 on 16 and 113 DF, p-value: 8.601e-10
Reajustando o modelo 2 para diminuir o vif ou variáveis redundantes (removendo popurbana, distmin,meso e escolaridade)
reg_N3 <- lm(log(confirmed_per_100k_inhabitants + 1) ~
airport + perc.rural +
log(eingen.cen.dist) +
perc.with.wages +
precTotal + tmean +
dist.min.ilh.ssa,
data = filter(munis, confirmed > 0))
vif(reg_N3)
## airport perc.rural log(eingen.cen.dist)
## 1.169427 1.624219 1.287333
## perc.with.wages precTotal tmean
## 1.261770 1.268491 1.105814
## dist.min.ilh.ssa
## 1.161556
resu3 <- summary(reg_N3)
resu3
##
## Call:
## lm(formula = log(confirmed_per_100k_inhabitants + 1) ~ airport +
## perc.rural + log(eingen.cen.dist) + perc.with.wages + precTotal +
## tmean + dist.min.ilh.ssa, data = filter(munis, confirmed >
## 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7965 -0.5618 -0.1265 0.5295 1.8302
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.8221712 2.1269787 2.737 0.00712 **
## airportYES 0.2087902 0.3019778 0.691 0.49062
## perc.rural -0.5308106 0.4283633 -1.239 0.21766
## log(eingen.cen.dist) -0.1509441 0.0538791 -2.802 0.00592 **
## perc.with.wages -0.0873838 1.8830835 -0.046 0.96306
## precTotal 0.0013393 0.0005020 2.668 0.00867 **
## tmean -0.1687760 0.0636953 -2.650 0.00912 **
## dist.min.ilh.ssa -0.0022338 0.0004835 -4.620 9.58e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8082 on 122 degrees of freedom
## Multiple R-squared: 0.2845, Adjusted R-squared: 0.2435
## F-statistic: 6.932 on 7 and 122 DF, p-value: 6.047e-07
Modelo somente mesorregiões
reg_N4 <- lm(log(confirmed_per_100k_inhabitants + 1) ~
mesoregiao,
data = filter(munis, confirmed > 0))
Resultado
resu4 <- summary(reg_N4)
resu4
##
## Call:
## lm(formula = log(confirmed_per_100k_inhabitants + 1) ~ mesoregiao,
## data = filter(munis, confirmed > 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.96845 -0.55649 -0.02645 0.49207 1.91919
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.8792 0.2008 9.359 4.77e-16
## mesoregiaoCentro-Sul Baiano 0.4831 0.2517 1.919 0.0572
## mesoregiaoExtremo Oeste Baiano -0.8986 0.5053 -1.778 0.0778
## mesoregiaoMetropolitana de Salvador 0.2289 0.2759 0.830 0.4083
## mesoregiaoNordeste Baiano -0.2896 0.2839 -1.020 0.3098
## mesoregiaoSul Baiano 1.0256 0.2367 4.332 3.03e-05
## mesoregiaoVale São-Franciscano da Bahia 0.2833 0.3478 0.815 0.4169
##
## (Intercept) ***
## mesoregiaoCentro-Sul Baiano .
## mesoregiaoExtremo Oeste Baiano .
## mesoregiaoMetropolitana de Salvador
## mesoregiaoNordeste Baiano
## mesoregiaoSul Baiano ***
## mesoregiaoVale São-Franciscano da Bahia
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8031 on 123 degrees of freedom
## Multiple R-squared: 0.2877, Adjusted R-squared: 0.253
## F-statistic: 8.282 on 6 and 123 DF, p-value: 1.539e-07
Modelo somente estrutura da rede de transporte
reg_N5 <- lm(log(confirmed_per_100k_inhabitants + 1) ~
as.factor(module)+log(eingen.cen.dist)+roles,
data = filter(munis, confirmed > 0))
vif(reg_N5)
## GVIF Df GVIF^(1/(2*Df))
## as.factor(module) 1.335475 5 1.029351
## log(eingen.cen.dist) 1.214092 1 1.101858
## roles 1.502064 3 1.070158
Resultado
resu5 <- summary(reg_N5)
resu5
##
## Call:
## lm(formula = log(confirmed_per_100k_inhabitants + 1) ~ as.factor(module) +
## log(eingen.cen.dist) + roles, data = filter(munis, confirmed >
## 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.62071 -0.65374 -0.08344 0.52352 2.05712
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.523754 0.316205 4.819 4.27e-06 ***
## as.factor(module)2 0.465253 0.310408 1.499 0.13654
## as.factor(module)3 0.100976 0.397312 0.254 0.79982
## as.factor(module)4 0.168103 0.254437 0.661 0.51008
## as.factor(module)5 0.816464 0.230969 3.535 0.00058 ***
## as.factor(module)6 -0.008503 0.262474 -0.032 0.97421
## log(eingen.cen.dist) -0.133580 0.055505 -2.407 0.01762 *
## roleshub 0.340170 0.494808 0.687 0.49311
## rolesnetwork hub 1.633375 0.674310 2.422 0.01692 *
## rolesperipheral -0.182835 0.214891 -0.851 0.39656
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8574 on 120 degrees of freedom
## Multiple R-squared: 0.2081, Adjusted R-squared: 0.1487
## F-statistic: 3.504 on 9 and 120 DF, p-value: 0.0007013
Modelo incluindo renda mensal (month.wages)
reg_N6 <- lm(log(confirmed_per_100k_inhabitants + 1) ~
airport + dist.min.ilh.ssa +
log(eingen.cen.dist) +
perc.with.wages + month.wages +
log(precTotal) + tmean,
data = munis)
Checar a inflação do modelo:
# VIF
vif(reg_N6)
## airport dist.min.ilh.ssa log(eingen.cen.dist)
## 1.224698 1.205719 1.357233
## perc.with.wages month.wages log(precTotal)
## 1.145097 1.793628 1.346964
## tmean
## 1.104156
resu6 <- summary(reg_N6)
resu6
##
## Call:
## lm(formula = log(confirmed_per_100k_inhabitants + 1) ~ airport +
## dist.min.ilh.ssa + log(eingen.cen.dist) + perc.with.wages +
## month.wages + log(precTotal) + tmean, data = munis)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3021 -0.6624 -0.2519 0.5176 3.3679
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.4180348 1.8999086 -2.325 0.020554 *
## airportYES -0.1286528 0.3072251 -0.419 0.675620
## dist.min.ilh.ssa -0.0019580 0.0003442 -5.688 2.50e-08 ***
## log(eingen.cen.dist) 0.0241005 0.0368376 0.654 0.513338
## perc.with.wages -0.4109089 1.1797772 -0.348 0.727804
## month.wages 0.0021503 0.0004911 4.378 1.53e-05 ***
## log(precTotal) 0.6991059 0.2035196 3.435 0.000655 ***
## tmean 0.0227614 0.0452863 0.503 0.615516
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.055 on 397 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.2356, Adjusted R-squared: 0.2221
## F-statistic: 17.48 on 7 and 397 DF, p-value: < 2.2e-16
AIC
AIC(reg_N,reg_N3,reg_N4, reg_N5, reg_N6)
## Warning in AIC.default(reg_N, reg_N3, reg_N4, reg_N5, reg_N6): models are not
## all fitted to the same number of observations
covid.day <- get_corona_br(
dir = "output",
filename = "corona_brasil",
cidade = NULL,
uf = 'BA',
ibge_cod = NULL,
by_uf = FALSE
)
covid.day <- data.frame(covid.day,
afetados = ifelse(covid.day$confirmed > 0, 1, 0))
covid.day <- left_join(ibge, centralidade, by = c("cod_ibge" = "ibgecode")) %>%
left_join(federal, by = c("cod_ibge" = "ibge")) %>%
left_join(covid.day, by = c("cod_ibge" = "city_ibge_code")) %>%
left_join(clima, by = c("cod_ibge" = "ibge")) %>%
left_join(meso, by = c("cod_ibge" = "code")) %>%
mutate(dens.road = ifelse(is.na(dens.road), 0, dens.road),
afetados = ifelse(is.na(afetados), 0, afetados),
airport = ifelse(is.na(airport), "NO", airport),
confirmed = ifelse(is.na(confirmed), 0, confirmed),
confirmed_per_100k_inhabitants = ifelse(is.na(confirmed_per_100k_inhabitants),
0,
confirmed_per_100k_inhabitants))
cod <- unique(covid.day$cod_ibge)
tempo <- data.frame(matrix(ncol = 2, nrow = length(cod)))
colnames(tempo) <- c('cod_ibge', 'tempo_1')
tempo[, 1] <- cod
for(i in 1:length(cod)){
tab.cod <- data.frame(covid.day[covid.day$cod_ibge == cod[i], ])
tab.cod <- tab.cod[order(tab.cod$date), ]
if(sum(tab.cod$confirmed > 0) > 0){ # Mudar de acordo com a qtd de casos
primeiro <- tab.cod[min(which(tab.cod$confirmed > 0)), 'date']
baseline <- as.Date.character('2020-03-06')
tempo [i, 2] <- as.numeric(difftime(primeiro, baseline, 'days'))
}
if(sum(tab.cod$confirmed > 0) == 0){
tempo [i, 2] <- NA
}
}
munis <- merge(munis, tempo, by = 'cod_ibge',
all.x = T, all.y = F, sort = F)
Principais fatores contribuindo para a quantidade de casos Removi distmin porque é redundante com dis.min.ilh.ssa Removi meso por causa do vif alto
reg_T <- lm(tempo_1 ~
airport +
log(eingen.cen.dist) + school.year +
perc.with.wages +
precTotal + tmean +
dist.min.ilh.ssa,
data = filter(munis, confirmed > 0))
vif(reg_T)
## airport log(eingen.cen.dist) school.year
## 1.215042 1.492712 1.756332
## perc.with.wages precTotal tmean
## 1.237328 1.142433 1.159400
## dist.min.ilh.ssa
## 1.098192
resu <- summary(reg_T)
resu
##
## Call:
## lm(formula = tempo_1 ~ airport + log(eingen.cen.dist) + school.year +
## perc.with.wages + precTotal + tmean + dist.min.ilh.ssa, data = filter(munis,
## confirmed > 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.8453 -6.9123 -0.8611 7.9989 25.1059
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 110.740264 27.040035 4.095 7.61e-05 ***
## airportYES -2.469370 4.079199 -0.605 0.546
## log(eingen.cen.dist) 0.376812 0.768872 0.490 0.625
## school.year -6.178689 1.395587 -4.427 2.09e-05 ***
## perc.with.wages -21.477892 24.712282 -0.869 0.386
## precTotal -0.002585 0.006314 -0.410 0.683
## tmean -0.695397 0.864319 -0.805 0.423
## dist.min.ilh.ssa -0.001173 0.006230 -0.188 0.851
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.71 on 122 degrees of freedom
## Multiple R-squared: 0.2631, Adjusted R-squared: 0.2208
## F-statistic: 6.223 on 7 and 122 DF, p-value: 3.046e-06
Para isso usamos um random forest.
# data_mod2 <- data_mod %>% na.omit() %>%
# mutate(total.pop = log(total.pop),
# eingen.cen.dist = log(eingen.cen.dist),
# afetados = as.factor(afetados),
# airport = as.factor(airport))
#
# reg_log <- randomForest(afetados ~
# airport + total.pop + perc.rural +
# eingen.cen.dist + school.year +
# perc.with.wages + dist.min,
# data = data_mod2,
# importance = TRUE)
Fazer um mapa de vulnerabilidade aqui.
Calcular a taxa
# USAR MESMA ESTRATEGIA DO OUTRO ARTIGO
Gráfico de todos os municipios
cols <- ifelse(levels(covid$city) == "Salvador", "black", "gray")
covid %>%
ggplot(aes(x = date, y = confirmed, color = city)) +
scale_color_manual(values = cols) +
geom_line() +
ggtitle("Casos de COVID-19 por cidade da Bahia") +
ylab("Casos confirmados") +
xlab("Data") +
theme_classic() +
theme(legend.position = "none") +
geom_text(aes(x = date[1] - 8, y = 300,
label = "Salvador"), color = "black")
covid <- as_tibble(get_corona_br(uf = "BA"))
Pequenos ajustes na tabela:
covid.ba <- covid %>%
filter(place_type == "state")
covid.ci <- covid %>%
filter(place_type == "city")
Segunda abordagem para crescimento exponencial
gm_mean <- function(x, na.rm = TRUE){
exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
}
r_calc <- function(x) {
gm_mean(x[2:length(x)] / x[1:(length(x)-1)])
}
Taxa de crescimento Bahia
casos.ba <- covid.ba$confirmed[nrow(covid.ba):1] #backwards
tempo.ba <- 1:length(casos.ba)
head(covid.ba)
exp.ba<- lm(log(casos.ba)~tempo.ba)
tax.ba<-coef(exp.ba)[2]
r.time.b<- data.frame(tempo=tempo.ba,
confirmados=casos.ba,
data=covid.ba$date[nrow(covid.ba):1],
taxa=NA,
r.squ=NA,
taxa2=NA) #segunda abordagem
for (i in 5:length(casos.ba))
{
exp.temp<-lm(log(casos.ba[1:i])~tempo.ba[1:i])
r.time.b[i,4]<-coef(exp.temp)[2]
r.time.b[i,5]<-summary(exp.temp)$r.squared
r.time.b[i,6]<-r_calc(casos.ba[1:i])
}
Correlação entre as duas abordagens
cor.test(r.time.b[,4], r.time.b[,6])
##
## Pearson's product-moment correlation
##
## data: r.time.b[, 4] and r.time.b[, 6]
## t = 20.722, df = 46, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9127250 0.9720364
## sample estimates:
## cor
## 0.9503889
Taxa de crescimento Bahia
plot(r.time.b$data, r.time.b$taxa,
bty="l", pch=19, ylim=c(0,0.3),
xlab="Data", ylab="Taxa Bahia")
Taxa de crescimento Municípios
city.codes<-na.omit(unique(covid.ci$city_ibge_code))
city.n<-length(city.codes)
w=1
list.ci<-list()
for (w in 1:city.n)
{
covid.temp <- covid.ci %>%
filter(city_ibge_code == city.codes[w])
n.temp<-nrow(covid.temp)
casos.temp<-covid.temp$confirmed[n.temp:1]
tempo.temp<-1:n.temp
list.ci[[w]]<-data.frame(tempo=tempo.temp, #tempo
casos=casos.temp) #casos
}
names(list.ci)<-city.codes
n.obs.ci<-sapply(list.ci, nrow) #tamanho das séries temporais
#filtrando cidades com no mínimo dez dias com corona
new.list.ci<-list.ci[n.obs.ci>9]
#Ajustado modelo exponencial para cada munícipio
exp.ci<-log(tempo)~casos #equação
mod.exp.ci<-lapply(new.list.ci, lm, formula=exp.ci) #modelo exponencial
coe.exp.ci<-lapply(mod.exp.ci, coef) #coeficientes
r.ci<-sapply(coe.exp.ci, "[", 2) #só a inclinação ("taxa r")
sum.exp.ci<-lapply(mod.exp.ci, summary) #sumário dos modelos
r.squ.ci<-sapply(sum.exp.ci, "[", "r.squared")
r.squ.ci<-unlist(r.squ.ci)
#ajustando na segunda abordagem
r.ci2<-numeric()
for (i in 1:length(new.list.ci))
{
r.ci2[i]<-r_calc(new.list.ci[[i]][,2])
}
# Montando a planilha
r.mun.dat<-data.frame(city_ibge_code=names(new.list.ci),
taxa=r.ci,
r.square=r.squ.ci,
taxa2=r.ci2)
only80<-r.mun.dat %>%
filter(r.square>0.8)
cor.test(only80[,2], only80[,4])
##
## Pearson's product-moment correlation
##
## data: only80[, 2] and only80[, 4]
## t = NaN, df = 11, p-value = NA
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## NaN NaN
## sample estimates:
## cor
## NaN
ba.shp<-readOGR("Data/bahia.shp")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/brunovilela/OneDrive/Papers/COVID19-Bahia/Data/bahia.shp", layer: "bahia"
## with 417 features
## It has 2 fields
x.ibge<-match(ba.shp$CD_GEOCMU, munis$cod_ibge)
x.ibge2<-match(ba.shp$CD_GEOCMU, filter(munis, confirmed > 0)$cod_ibge)
ba.shp$mesoregiao<-munis$mesoregiao[x.ibge]
ba.shp$module<-munis$module[x.ibge]
ba.shp$total.pop<-log(munis$total.pop)[x.ibge]
ba.shp$perc.rural<-munis$perc.rural[x.ibge]*100
ba.shp$dist.min.ilh.ssa<-munis$dist.min.ilh.ssa[x.ibge]
ba.shp$precTotal<-log(munis$precTotal)[x.ibge]
ba.shp$afetados<-munis$afetados[x.ibge]
ba.shp$res_casos<-resid(reg_N3)[x.ibge2]
ba.shp$pred_afetados<-predict(reg_log2, type="response")[x.ibge]
sp::spplot(ba.shp, "mesoregiao", main="Messoregiões")
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
sp::spplot(ba.shp, "module", main="Módulos")
sp::spplot(ba.shp, "total.pop", main="Log Tamanho da População")
sp::spplot(ba.shp, "perc.rural", main="% População Rural")
sp::spplot(ba.shp, "dist.min.ilh.ssa", main="Distância de Aeroportos")
sp::spplot(ba.shp, "afetados", main="Municípios Afetados")
sp::spplot(ba.shp, "res_casos", main="Resíduos Casos")
sp::spplot(ba.shp, "pred_afetados", main="Probabilidade Afetados")